library(ape)
library(mpqDist)
library(plotly)
#>
#> Attaching package: 'plotly'
#> The following object is masked from 'package:stats':
#>
#> filter
#> The following object is masked from 'package:graphics':
#>
#> layout
#> The following object is masked from 'package:ggplot2':
#>
#> last_plot
library(phyloseq)
library(tidyverse)
#> ── Attaching core tidyverse packages ────────────────────────────────────────── tidyverse 2.0.0 ──
#> ✔ dplyr 1.1.4 ✔ readr 2.1.5
#> ✔ forcats 1.0.0 ✔ stringr 1.5.1
#> ✔ lubridate 1.9.4 ✔ tibble 3.2.1
#> ✔ purrr 1.0.2 ✔ tidyr 1.3.1
#> ── Conflicts ──────────────────────────────────────────────────────────── tidyverse_conflicts() ──
#> ✖ dplyr::filter() masks plotly::filter(), stats::filter()
#> ✖ dplyr::lag() masks stats::lag()
#> ✖ dplyr::where() masks ape::where()
#> ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(viridis)
#> Loading required package: viridisLite
library(phangorn)
#>
#> Attaching package: 'phangorn'
#>
#> The following object is masked from 'package:plotly':
#>
#> add_boxplot
library(patchwork)
library(treeDA)
library(ggbrace)
data(gentry)
gentry
#> phyloseq-class experiment-level object
#> otu_table() OTU Table: [ 585 taxa and 197 samples ]
#> sample_data() Sample Data: [ 197 samples by 4 sample variables ]
#> tax_table() Taxonomy Table: [ 585 taxa by 4 taxonomic ranks ]
#> phy_tree() Phylogenetic Tree: [ 585 tips and 349 internal nodes ]
We’re going to test whether our null means and variances look right. To do this, we will generate “OTU tables” that are independent normal random variables, with potentially different standard deviations. We’ll use the gentry data as a skeleton (same tree, same number of samples and sites, same standard deviations for the species when we look into changing standard deviations).
sim_homo = gentry
sim_hetero = gentry
n = nsamples(gentry)
p = ntaxa(gentry)
sds = apply(log(1 + otu_table(gentry)), 2, sd)
sim_homo_matrix = matrix(rnorm(n = n * p), nrow = n, ncol = p)
rownames(sim_homo_matrix) = sample_names(gentry)
colnames(sim_homo_matrix) = taxa_names(gentry)
sim_hetero_matrix = sweep(sim_homo_matrix, MARGIN = 2, STATS = sds, FUN = '*')
otu_table(sim_homo) = otu_table(sim_homo_matrix, taxa_are_rows = FALSE)
otu_table(sim_hetero) = otu_table(sim_hetero_matrix, taxa_are_rows = FALSE)
base_sample = 1
nmv = get_null_mean_and_variance(sim_homo)
mpq_distances = get_mpq_distances(otu_table(sim_homo), phy_tree(sim_homo))
animation_df = make_animation_df(mpq_distances, get_avg_distances_to_set, means_and_vars = nmv, base_sample, sample_data(sim_homo))
p = ggplot(aes(x = Lat, y = avg_dist, color = Elev), data = animation_df) +
geom_point(aes(frame = frame)) +
geom_hline(aes(yintercept = null_mean, frame = frame)) +
geom_hline(aes(yintercept = null_mean - 2 * null_sd, frame = frame)) +
geom_hline(aes(yintercept = null_mean + 2 * null_sd, frame = frame)) +
stat_smooth(aes(frame = frame), se = FALSE) +
scale_x_reverse() +
scale_color_viridis()
ggplotly(p)
#> `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
p = ggplot(aes(x = Lat, y = avg_dist, color = Elev), data = animation_df) +
geom_point(aes(frame = frame)) +
geom_hline(aes(yintercept = median, frame = frame)) +
geom_hline(aes(yintercept = upper, frame = frame)) +
geom_hline(aes(yintercept = lower, frame = frame)) +
stat_smooth(aes(frame = frame), se = FALSE) +
scale_x_reverse() +
scale_color_viridis()
ggplotly(p)
#> `geom_smooth()` using method = 'loess' and formula = 'y ~ x'